.libPaths(c("/samurlab1/Joshua/more_Rlib/", .libPaths()))
# Mature Neutrophil differential analysis 
# will compare the 3 mature neutrophil clusters in single cell RNA-seq dataset 
# replot in low dim space using HVF 
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(clusterProfiler)
## 
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan,
## X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal
## enrichment tool for interpreting omics data. The Innovation. 2021,
## 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Hs.eg.db) 
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%()     masks IRanges::%within%()
## ✖ IRanges::collapse()       masks dplyr::collapse()
## ✖ Biobase::combine()        masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc()           masks dplyr::desc()
## ✖ tidyr::expand()           masks S4Vectors::expand()
## ✖ clusterProfiler::filter() masks dplyr::filter(), stats::filter()
## ✖ S4Vectors::first()        masks dplyr::first()
## ✖ dplyr::lag()              masks stats::lag()
## ✖ ggplot2::Position()       masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()           masks IRanges::reduce()
## ✖ S4Vectors::rename()       masks clusterProfiler::rename(), dplyr::rename()
## ✖ lubridate::second()       masks S4Vectors::second()
## ✖ lubridate::second<-()     masks S4Vectors::second<-()
## ✖ AnnotationDbi::select()   masks clusterProfiler::select(), dplyr::select()
## ✖ purrr::simplify()         masks clusterProfiler::simplify()
## ✖ IRanges::slice()          masks clusterProfiler::slice(), dplyr::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## 
## The following object is masked from 'package:IRanges':
## 
##     slice
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(monocle3)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## 
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## 
## 
## Attaching package: 'monocle3'
## 
## The following objects are masked from 'package:Biobase':
## 
##     exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
library(ggplot2)
library(EnhancedVolcano)
## Loading required package: ggrepel

Pre-Neutrophil scoring. We must convert the list of pre-neutrophil signature from mouse transcripts to huma using this script below.

#neutrophil single cell trajectory analysis
preneu.feats.mus = read.delim(file = "/samurlab1/Joshua/MM_scRNAseq/preneugenes.txt", sep ="")
preneu.feats.mus = preneu.feats.mus$x

#currently, features are in mouse symbol annotation, if we use as is it will coerce an error
#this function uses ensemble annotated features to convert gene names. 
musGenes = preneu.feats.mus

mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_mouse_to_human <- function(gene_list){
  
  output = c()
  
  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }
  
  return (output)
}

preneu.homo = convert_mouse_to_human(as.vector(preneu.feats.mus))

neuts = AddModuleScore(neuts, features = list(preneu.homo), name = "preneu.sig")
## Warning: The following features are not present in the object: F10, ELDR,
## HSPB6, XKR5, not searching for symbol synonyms
FeaturePlot(neuts, features = "preneu.sig1", cols = c("steelblue1","firebrick3"), pt.size  = 0.1)

FeaturePlot(neuts, features = "preneu.sig1", split.by="tissue" ,cols = c("steelblue1","firebrick3"), pt.size  = 0.1)

## selecting .1 as a stringent cut off for origin local in minimum spanning tree

neuts@reductions$umap@cell.embeddings = neuts@reductions$umap@cell.embeddings[,c(1,2)]
neuts$predicted = NULL
## Warning: Cannot find cell-level meta data named predicted
hist(neuts$preneu.sig1)

neuts$stem = neuts$preneu.sig1 > .1
# selecting .1 as a stringent cut off for origin local in minimum spanning tree
stemcells = colnames(neuts[,neuts$stem == TRUE])

####monocle3 for sc trajectories####
neuts.cds = as.cell_data_set(neuts)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
neuts.cds@rowRanges@elementMetadata@listData[["gene_short_name"]] = rownames(neuts.cds[["SCT"]])
neuts.cds = preprocess_cds(neuts.cds, num_dim = 50)
neuts.cds = cluster_cells(neuts.cds)
neuts.cds = learn_graph(neuts.cds)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
neuts.cds <- order_cells(neuts.cds, root_cells = stemcells)

# neuts.cds_pr_test_res = graph_test(neuts.cds, neighbor_graph="principal_graph", cores=64

plot_cells(neuts.cds,
           color_cells_by = "pseudotime",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           show_trajectory_graph = TRUE, label_principal_points = F, label_roots = T)
## Cells aren't colored in a way that allows them to be grouped.

Supplemental Figure 2C T1, T2, and T3 signature comparison from Science. 2024;383(6679):eadf6493. doi:10.1126/science.adf6493

#T1 signature####
neuts = AddModuleScore(neuts, features =list(c("CXCR2","CD300LD","DUSP1","GBP2","IFITM1","IL1B","ISG15",
                                               "JAML","JUNB","MSRB1","OSM","S100A6","SELPLG","SLPI")),
                       name = "T1_Signature")



t1 = FeaturePlot(neuts, features = "T1_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t1

t1 = VlnPlot(neuts, features = "T1_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.
order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t1$data$ident, levels=order)
t1$data$ident = factor(t1$data$ident, levels=order)
t1

#T2 signature####
neuts = AddModuleScore(neuts, features =list(c("LTC4S","MMP8","MMP9","PPIA","PRR13","PTMA","RETNLG",
                                               "TNFAIP3","IRAK3","CYBB","CCRL2","OLR1","CLEC6A")),
                       name = "T2_Signature")
## Warning: The following features are not present in the object: RETNLG, not
## searching for symbol synonyms
t2 = FeaturePlot(neuts, features = "T2_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t2

t2 = VlnPlot(neuts, features = "T2_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)

order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t2$data$ident, levels=order)
t2$data$ident = factor(t2$data$ident, levels=order)
t2

#T3 signature####
neuts = AddModuleScore(neuts, features =list(c("ATF3","CCL3","CCL4","CD274","CSTB","CXCL3","HCAR2","HILPDA","HK2",
                                          "HMOX1","IER3","JUN","LDHA","MIF","PLIN2","SPP1","TGIF1","TNFRSF10D",
                                          "VEGFA","ZEB2")), name = "T3_Signature")



t3 = FeaturePlot(neuts, features = "T3_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t3

t3 = VlnPlot(neuts, features = "T3_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)

order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t3$data$ident, levels=order)
t3$data$ident = factor(t3$data$ident, levels=order)
t3

Enhanced volcano plot using mature neutrophils derived from either focal lesion or bone marrow. Here, we seek to identify differences that will reveal specific transcripts beloning to mature neutrophils from either bone marrow or at site of osteolytic lesions.

neuts.mat.sub = subset(neuts, idents = c("TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut","RETN+LCN2+mat_neut"))
DimPlot(neuts.mat.sub)

mature.neut.norm = as.matrix(neuts.mat.sub$tissue == "BM")
# mature.neut.norm
# FALSE  TRUE 
# 23913 11935 
mature.neut.norm.T = mature.neut.norm[mature.neut.norm %in% TRUE,]
length(mature.neut.norm.T)
## [1] 11935
mature.neut.norm.barcodes = names(mature.neut.norm.T)
# 11935

mature.neut.lesion = as.matrix(neuts.mat.sub$tissue == "OL")
# table(mature.neut.lesion)
# FALSE  TRUE 
# 11935 23913 
mature.neut.lesion.T = mature.neut.lesion[mature.neut.lesion %in% TRUE,]
length(mature.neut.lesion.T)
## [1] 23913
mature.neut.lesion.barcodes = names(mature.neut.lesion.T)
# 23913
z = FindMarkers(neuts.mat.sub, ident.1 = mature.neut.lesion.barcodes, ident.2 = mature.neut.norm.barcodes,  min.pct = 0.1)
# View(z)

#pval sort 
z.ord = z[order(z$avg_log2FC,decreasing = T),]
head(z.ord)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## VEGFA   2.489244e-155   2.954315 0.115 0.032 5.176133e-151
## IER3    1.693558e-264   1.979016 0.322 0.172 3.521584e-260
## BHLHE40  1.461669e-88   1.729294 0.100 0.040  3.039394e-84
## NDRG1    1.714661e-89   1.638819 0.136 0.067  3.565465e-85
## MAFF    1.090737e-108   1.621894 0.113 0.043 2.268077e-104
## DDIT4   8.697295e-124   1.582138 0.152 0.067 1.808516e-119
EnhancedVolcano(z.ord, lab = rownames(z), x = "avg_log2FC", y = "p_val_adj",
                pCutoff = 10e-50, FCcutoff = 1.15, title = "OL left vs BM right")